## Keefer 2007 IO

library(foreign)
library(Amelia)

## Load original dataset
k2007 <- read.dta("K2007 IO Rep Data.dta")
head(k2007)
dim(k2007)

## Subset data to include only crises
k2007 <- k2007[ which(k2007$fcostgdp > 0), ]

## Due to small sample size only those variables in the analysis model can be included in the imputation model
keep <- c("ifs", "fcostgdp", "checkav33_res", "eiecav33", "stabsav_3", "reser_debt_1", "curr_acct_1", "curr_acctdel_1", "gdpgav_3", "gdpav33", "terms_tradedel_1")
k2007 <- k2007[keep]

## Imputation
case <-c(1:nrow(k2007))
k2007 <- cbind(k2007, case)
head(k2007)
dim(k2007)

## What is average percentage of missing data?
NAs <- function(x) {
    as.vector(apply(x, 2, function(x) length(which(is.na(x)))))
    }
NAs(k2007)
mean(NAs(k2007)/nrow(k2007))*100

## Thus: 5 imputations

set.seed(02138)
k2007.out <- amelia(k2007, m = 5, cs = "ifs", empri = 0.1*nrow(k2007))

write.amelia(obj=k2007.out, file.stem = "K2007 IO Imp Data.dta", format = "dta", separate = FALSE)